keep = ls()

#######################################
#Load CWDB soldiers and Matching Links#
#######################################

cwdb_soldiers = fread("./data/indiana_soldiers/cleaned/cwdb_to_match.csv", na.strings = c("NA", ""))
cwdb_1874_matches = fread("./data/indiana_soldiers/cleaned/cwdb_to_1874_links.csv")
cwdb_census_matches = fread("./data/indiana_soldiers/cleaned/cwdb_to_census_links.csv") %>% unique

##########################
#Load CWDB Treatment Data#
##########################

in_veterans_treatment = fread('./data/indiana_soldiers/cleaned/in_cwdb_treatment_data.csv')
in_veterans_treatment[, indate := as.Date(indate)]

########################
#Load Census Covariates#
########################

census_cov = fread('./data/indiana_soldiers/cleaned/census_indiana_covariates.csv')

#############################
#Load predicted partisanship#
#############################

census_pred = fread('./data/indiana_soldiers/cleaned/census_predicted_partisanship.csv')

setkey(cwdb_census_matches, mpcid)
setkey(census_pred, mpcid)

#Merge predicted partisanship to soldiers, 
#take average predicted partisanship
census_pred = census_pred[cwdb_census_matches] %>%
            .[, lapply(.SD, mean), 
              by = personpk, 
              .SDcols = paste0('prob_', 0:3)]



####################
#Make Analysis Data#
####################

baseTable = cwdb_soldiers[, list(personpk, census_county, use_quality)
                                     ] %>% unique

#Merge in Census Links
setkey(baseTable, personpk, census_county)
setkey(cwdb_census_matches, personpk, census_county)
baseTable = cwdb_census_matches[baseTable] %>% 
            .[, list(personpk, census_county, use_quality,
                      any_census = any(!is.na(mpcid))), 
                      by = personpk] %>% unique

#Merge in 1874 Directories
cwdb_1874_matches = cwdb_1874_matches[(use), list(personpk,
                         id_1874 = str_extract(image_url, "(?<=[/])[^./]+(?=\\.png$)"),
                         party,
                         gamma.1, gamma.2, gamma.3, gamma.4, gamma.5, birth_dist)] %>% 
                    unique

cwdb_1874_matches[, census_county := id_1874 %>% 
                                   str_extract("^[a-z]+") %>%
                                   str_to_title()]

#Calculate match quality, overall score
cwdb_1874_matches[, score := gamma.1 + gamma.2 + gamma.3 + gamma.4 + gamma.5 + ifelse(!is.na(birth_dist), 2*(birth_dist < 2) + (birth_dist < 4), 0)]
#Identify best match
cwdb_1874_matches[, best := score == max(score) , by = list(personpk, census_county)]
#Identify minimum match quality
cwdb_1874_matches[, match_q_1874 := (gamma.1 %in% 2 & gamma.2 %in% 2) |
                                (gamma.1 %in% 2 & (gamma.2 %in% 1 | gamma.4 %in% 2)) |
                                ((gamma.1 %in% 1 | gamma.3 %in% 2) & gamma.2 %in% 2) |
                                (gamma.5 %in% 2 & gamma.2 %in% 2)]

setkey(baseTable, personpk, census_county)
setkey(cwdb_1874_matches, personpk, census_county)

cwdbTable = cwdb_1874_matches[baseTable] %>% unique
cwdbTable = cwdbTable[, list(personpk, census_county, use_quality,
                             matched_census = any_census, 
                             matched_1874 = !is.na(id_1874),
                             id_1874, party, best, match_q_1874, score,
                             gamma.1, gamma.2, gamma.3, gamma.4, gamma.5, birth_dist
                             )] %>% 
            unique

#Merge in soldier birth year:
cwdb_by = cwdb_soldiers[, list(personpk, birth_year)] %>% unique
setkey(cwdb_by, personpk)
setkey(cwdbTable, personpk)
cwdbTable = cwdb_by[cwdbTable]

#Merge in soldier predicted partisanship:
setkey(cwdbTable, personpk)
setkey(census_pred, personpk)
cwdbTable = census_pred[cwdbTable]


############################
#Create census balance data#
############################

#Because some soldiers matched to multiple census persons
#Randomly sample which census match to use

setkey(census_cov, mpcid)
setkey(cwdb_census_matches, mpcid)
set.seed(10403)
census_balance = census_cov[cwdb_census_matches] %>% 
      .[, use := sample(rep(c(T,F), c(1, .N-1))), by = list(personpk, census_county)] %>%
      .[(use)]

#Create balance variables
census_balance = census_balance[, list(birth_place_clean, 
                                       c_birth_year = birth_year,
                      school = school %in% "Yes", 
                      illiterate = illiterate %in% "Y", 
                      hh_head, 
                      hh_kids, 
                      l_hh_real_estate = log(hh_real_estate + 1) , 
                      l_hh_personal_estate = log(hh_personal_estate + 1), 
                      owns_property = hh_real_estate > 0,
                      married,
                      hh_size = hh_comp_str %>% str_count(" "),
                      hh_ma_males = hh_comp_str %>% 
                                      str_extract_all("Male_\\d{1,2}") %>% 
                                      unlist %>%
                                      str_extract("\\d+") %>%
                                      as.numeric %>% 
                                      `>=` (13) %>% 
                                      sum,
                      hh_age_heaping = hh_comp_str %>% 
                                       str_extract_all("\\d+") %>%
                                       unlist %>%
                                       as.numeric %>% 
                                       `%%` (5) %>%
                                       `==` (0) %>%
                                        sum
                      ), by = list(personpk, census_county)]

#Create Census: region of birth
south = c("Alabama", "Arkansas", "Florida", "Georgia", "Louisiana", "North Carolina", "South Carolina", "Tennessee", "Virginia")
border = c("Delaware", "Kentucky", "Maryland", "Missouri", "District of Columbia")
north = setdiff(state.name, c(border, south))
britain = c("England", "Canada", "Scotland", "Wales")
ireland = "Ireland"
germany = c("Germany", "Austria", "Switzerland")

census_balance[, birth_place_group := "Other"]
census_balance[birth_place_clean %in% south, birth_place_group := "US South"]
census_balance[birth_place_clean %in% border, birth_place_group := "US Border"]
census_balance[birth_place_clean %in% north, birth_place_group := "US North"]
census_balance[birth_place_clean %in% britain, birth_place_group := "Britain"]
census_balance[birth_place_clean %in% ireland, birth_place_group := "Ireland"]
census_balance[birth_place_clean %in% germany, birth_place_group := "Germany"]


###########
#Create DV#
###########

cwdbTable[, party_rep := str_detect(party, "([rR]ep)|(Union)|(Radical)|(Freesoiler)")]
cwdbTable[, party_dem := str_detect(party, "[Dd]em")]
cwdbTable[is.na(party_rep), party_rep := F]
cwdbTable[is.na(party_dem), party_dem := F]
cwdbTable[, prob_rep := prob_3]
cwdbTable[, prob_dem := prob_0]
cwdbTable[, prob_diff := prob_rep - prob_dem]
cwdbTable[, party_diff := (party_rep - party_dem)/2]

####################
#Merge in treatment#
####################

setkey(cwdbTable, personpk)
setkey(in_veterans_treatment, personpk)
cwdbTable = in_veterans_treatment[cwdbTable]

#Regiment-Year Dummies for company level treatment
cwdbTable[, cov_co := paste(regimentpk, enlist_year,term_sort, sep = ":")]
cwdbTable[, cov_co_alt := paste(regimentpk, enlist_year,term_sort, substitute, drafted, sep = ":")]

#Company clusters
cwdbTable[, cov_co_cl := paste(regimentpk, enlist_year,term_sort, companypk, sep = ":")]

#Calculate Death rates
cwdbTable[, co_death_rate := (company_deaths) / in_company_n]
cwdbTable[, co_kia_rate := (company_kia) / in_company_n]

#####################
#Prepare for anaysis#
#####################

#missing birth year, dummy, set birth_year to mean
cwdbTable[, birth_year_na := is.na(birth_year)]
cwdbTable[(birth_year_na), birth_year := cwdbTable[, birth_year] %>% median(na.rm = T)]

#Define samples
cwdbTable[, sample_match :=  (!matched_1874 | (matched_1874 & match_q_1874))]
cwdbTable[, sample_match_best :=  (!matched_1874 | (matched_1874 & match_q_1874 & best))]
cwdbTable[, sample_cmatch := (matched_census) & (!matched_1874 | (matched_1874 & match_q_1874))]
cwdbTable[, sample_cmatch_best := (matched_census) & (!matched_1874 | (matched_1874 & match_q_1874 & best))]
cwdbTable[, sample_inf := type %in% "Inf"  & !(unitcompany %in% c('U', "Band", "S"))]
cwdbTable[, all := T]

#Define Functions
do_felm = function(y, x, fe = 0, z = 0, cl = 0, data, w = NULL) {
  formula = as.formula(paste(y, " ~ ", x, "|", fe, "|" , z, "|", cl))
  return(felm(formula, data = data, weights = w))
}

get_n = function(x) {
  x %>% unique %>% paste(collapse = ", ")
}

####################
####################
##Indiana Analyses##
####################
####################


##########
#Table B2#
##########

#First: pre-treatment latent partisanship predicts actual partisanship
dvs = paste0("party_", c('rep', 'dem', 'diff'))
ivs = paste0("prob_", c('rep', 'dem', 'diff'))
cl = 'personpk'

#Sample matched to census; matched to 1874
tableData = cwdbTable[(sample_cmatch) & matched_1874] %>%
  .[, wt_1874 := 1 / .N, by = personpk]

m_latent_party_1 = Map(function(y,x) do_felm(y, x, fe = 0, cl = cl, 
                            data = tableData, 
                            w = tableData$wt_1874),
                            dvs,ivs) 
  
#Sample matched to census; matched to 1874 (best)
tableData = cwdbTable[(sample_cmatch_best) & (matched_1874)] %>%
  .[, wt_1874 := 1 / .N, by = personpk]
m_latent_party_2 = Map(function(y,x) do_felm(y, x, fe = 0, cl = cl, 
                                             data = tableData, 
                                             w = tableData$wt_1874),
                                      dvs,ivs) 

  
latent_party_table = stargazer(c(m_latent_party_1, m_latent_party_2),
          keep = ivs,
          dep.var.labels = c("Rep.", "Dem.", "Party Diff.") %>% rep(2),
          covariate.labels = c("Rep. Prob.", "Dem. Prob.", "Diff Prob."),
          column.separate = c(3,3),
          column.labels = c("All Matches", "Best Matches"),
          keep.stat = 'n',
          label = "tab:indiana_latent_prediction",
          title = "Indiana Veterans: Predicting Partisanship with Latent Partisanship",
          type = 'latex', star.cutoffs = c(0.05,0.01,0.001))

note = "Sample includes men serving in Indiana Regiments who were matched to the 1860 Census and 1874 People's Guides. Restricted sample includes only the best 1874 matches. Individuals are weighted by 1 over the number of matches. Latent partisanship is predicted probability of being Republican or Democrat based on the person's name, birth year, and birth place listed in the 1860 census. Standard errors are clustered by individual."
write_latex(latent_party_table, note, './output/appendix/Table_B2.tex')

###########
#Figure B2#
###########

#Does Partisanship Predict Attrition? Bivariate:

dvs = c("died", "deserted", "matched_1874")
ivs = paste0("prob_", c('rep', 'dem', 'diff'))
cl = 'personpk'

tableData = cwdbTable[(sample_cmatch_best) & (sample_inf)] %>%
  .[, wt_1874 := 1 / .N, by = personpk]

a = Map(function(y,x) do_felm(y, x, fe = 0, cl = cl, 
                          data = tableData, 
                          w = tableData$wt_1874),
    rep(dvs, each = length(ivs)), rep(ivs, length(dvs))) %>%
  dwplot(., by_2sd = T,
         vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
         relabel_predictors(c(prob_rep = "Pr(Rep.)", 
                              prob_dem = 'Pr(Dem.)', 
                              prob_diff = "Pr(Rep.) - Pr(Dem.)")) +
          scale_colour_grey(start = .3, end = .7,
                           name = "Outcome",
                           #breaks = c(0, 1, 2),
                           labels = c("Died", "Deserted", "Matched 1874"),
                           ) +
          theme_bw() +
          theme(legend.position = 'bottom') 

ggsave("./output/appendix/Figure_B2.pdf", a, width = 8.5, height = 5)

#Full table
model_list = Map(function(y,x) do_felm(y, x, fe = 0, cl = cl, 
                                       data = tableData, 
                                       w = tableData$wt_1874),
                 rep(dvs, each = length(ivs)), rep(ivs, length(dvs)))

names(model_list) = c("Died", "Deserted", "Matched to 1874") %>% rep(each = 3)
note = "Sample includes men serving in Indiana Regiments who were matched to the 1860 Census. Matches only include 'best' post-war links for each individual. Individuals are weighted by 1 over the number of matches. Latent partisanship is predicted probability of being Republican or Democrat based on the person's name, birth year, and birth place listed in the 1860 census. Standard errors are clustered by individual."


table_out = modelsummary(model_list, stars = T, 
                         coef_rename = c("prob_rep" = "Pr(Rep.)", 'prob_dem' = "Pr(Dem.)", 'prob_diff' = 'Pr(Rep.) - Pr(Dem.)'), 
                         notes = note, 
                         title = "Differential Attrition by Predict Partisanship (Bivariate)")
table_out = data.table(table = table_out, name = "Figure B2")
table_list = c(table_list, list(table_out))

############
#Figure B15#
############

#Does Partisanship Predict Attrition? Within Regiments:

dvs = c("died", "deserted", "matched_1874")
ivs = paste0("prob_", c('rep', 'dem', 'diff'))
cl = 'personpk'

tableData = cwdbTable[(sample_cmatch_best) & (sample_inf)] %>%
  .[, wt_1874 := 1 / .N, by = personpk]

b = Map(function(y,x) do_felm(y, x, fe = 'cov_co', cl = 'cov_co_cl', 
                              data = tableData, 
                              w = tableData$wt_1874),
        rep(dvs, each = length(ivs)), rep(ivs, length(dvs))) %>%
  dwplot(., by_2sd = T,
         vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
  relabel_predictors(c(prob_rep = "Pr(Rep.)", 
                       prob_dem = 'Pr(Dem.)', 
                       prob_diff = "Pr(Rep.) - Pr(Dem.)")) +
  scale_colour_grey(start = .3, end = .7,
                    name = "Outcome",
                    #breaks = c(0, 1, 2),
                    labels = c("Died", "Deserted", "Matched 1874"),
  ) +
  theme_bw() +
  theme(legend.position = 'bottom') 


ggsave("./output/appendix/Figure_B15.pdf", b, width = 8.5, height = 5)

#Full table
model_list = Map(function(y,x) do_felm(y, x, fe = 'cov_co', cl = 'cov_co_cl', 
                                       data = tableData, 
                                       w = tableData$wt_1874),
                 rep(dvs, each = length(ivs)), rep(ivs, length(dvs)))

note = "Sample includes men serving in Indiana Regiments who were matched to the 1860 Census. Matches only include 'best' post-war links for each individual. Individuals are weighted by 1 over the number of matches. Standard errors are clustered by company."

rows = c("Regiment FE", rep("Yes", 9)) %>% rbind %>% data.frame
attr(rows, 'position') = 7 
names(model_list) = c("Died", "Deserted", "Matched in 1874") %>% rep(each = 3)
table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         coef_rename = c(prob_rep = "Pr(Rep.)", 
                                         prob_dem = 'Pr(Dem.)', 
                                         prob_diff = "Pr(Rep.) - Pr(Dem.)"),
                         notes = note,
                         title = 'Differential Attrition by Predicted Partisanship (Within Regiment)')
table_out = data.table(table = table_out, name = "Figure B15")
table_list = c(table_list, list(table_out))

###########
#Figure B3#
###########

#Does treatment predict average company pre-treatment "latent" partisanship

dvs = paste0("prob_", c('rep', 'dem', 'diff'))
ivs = c('company_kia')
fe = c('cov_co')
cl = c('cov_co_cl')

specs = data.table(dvs = rep(dvs, length(ivs)), ivs, fe, cl)

tableData = cwdbTable[(sample_cmatch_best) & sample_inf] %>%
            .[, wt_1874 := 1 / .N, by = personpk]
n_i = tableData[!is.na(company_kia) & !is.na(prob_rep), 
                personpk %>% unique %>% length]

#Company-level partisanship not related to company-level treatment:
tableData = tableData[!is.na(company_kia) & !is.na(prob_rep)] %>%
  .[, list(
           company_kia = weighted.mean(company_kia, wt_1874),
           co_rep = weighted.mean(prob_rep, wt_1874),
           co_dem = weighted.mean(prob_dem, wt_1874),
           n = sum(wt_1874),
           party_diff = weighted.mean(party_diff, wt_1874),
           enlist_method = weighted.mean(substitute + drafted, wt_1874)
  ), by = list(cov_co, cov_co_cl)] 
tableData[, co_diff := co_rep - co_dem]

n_co = tableData$cov_co_cl %>% unique %>% length 
n_r = tableData$cov_co %>% unique %>% length
  
model_list = list(
  felm(company_kia ~ co_rep | cov_co | 0 | cov_co_cl, 
       data = tableData, weights = tableData$n),
  felm(company_kia ~ co_dem | cov_co | 0 | cov_co_cl, 
       data = tableData, weights = tableData$n),
  felm(company_kia ~ co_diff | cov_co | 0 | cov_co_cl, 
       data = tableData, weights = tableData$n)
)

p = model_list %>%
  dwplot(., dodge_size = 0, by_2sd = T,
         vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
  relabel_predictors(c(co_rep = "Pr(Rep.)", 
                       co_dem = 'Pr(Dem.)', 
                       co_diff = "Pr(Rep.) - Pr(Dem.)")) +
  scale_colour_grey(start = 0, end = 0,
                    name = "Outcome",
                    #breaks = c(0, 1, 2),
                    labels = c("Company KIA"),
  ) +
  xlab('Effect on Company KIA') + 
  theme_bw() +
  theme(legend.position = 'none') 

ggsave("./output/appendix/Figure_B3.pdf", p, width = 8.5, height = 5)


note = paste("Least squares regression of mean company casualties, by company, on mean company partisanship, conditional regiment-enlistment year fixed effects. Coefficients are not standardized. Sample includes serving in Indiana Regiments who were matched to the 1860 Census:", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Companies are weighted by the number of soldiers. Standard errors HC robust.', sep = "")


rows = c("Regiment FE", rep("Yes", length(model_list))) %>% rbind %>% data.frame
attr(rows, 'position') = 7 
table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         notes = note,
                         title = "Within-Regiment Relationship between mean Company Partisanship and mean Company KIAs",
                         coef_rename = c(co_rep = "Co. Mean Pr(Rep.)", 
                                       co_dem = 'Co. Mean Pr(Dem.)', 
                                       co_diff = "Co. Mean Pr(Rep.) - Pr(Dem.)"))
table_out = data.table(table = table_out, name = "Figure B3")
table_list = c(table_list, list(table_out))


############
#Figure B16#
############

#Does treatment predict attrition? 

dvs = c("deserted", "died", "matched_1874")
ivs = c('company_kia')
fe = c('cov_co')
cl = c('cov_co_cl')

specs = data.table(dvs = rep(dvs, each = length(ivs)), ivs, fe, cl)
setkey(specs, ivs)

tableData = cwdbTable[(sample_cmatch_best) & sample_inf] %>%
  .[, wt_1874 := 1 / .N, by = personpk]

n_i = tableData[!is.na(company_kia) & !is.na(died), personpk] %>% unique %>% length
n_co = tableData[!is.na(company_kia) & !is.na(died), cov_co_cl] %>% unique %>% length
n_r = tableData[!is.na(company_kia) & !is.na(died), cov_co] %>% unique %>% length

scaleFUN <- function(x) sprintf("%.3f", x)

model_list = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                                           data = tableData, 
                                           w = tableData$wt_1874),
                 specs$dvs, specs$ivs, specs$fe, specs$cl)

p = model_list %>% 
  dwplot(., by_2sd = T,
         vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2))  %>%
  relabel_predictors(c(co_kia_rate = "Company KIA Rate", 
                       company_kia = 'Company KIAs')) +
  scale_colour_grey(start = .3, end = .7,
                    name = "Outcome",
                    #breaks = c(0, 1, 2),
                    labels = c('Deserted', "Died", "Matched 1874"),
  ) +
  scale_x_continuous(labels = scaleFUN) +
  theme_bw() +
  xlab("Coefficient Estimate") +
  theme(legend.position = 'bottom') 

ggsave("./output/appendix/Figure_B16.pdf", p, height = 5, width = 8.5)


note = paste("Regression of outcomes (matched in 1874, died, deserted) on company KIAs, conditioning on regiment fixed effects. Coefficients are not standardized. Sample includes men serving in Indiana Infantry Regiments who were matched to the 1860 Census: ", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Individuals are weighted by 1 over the number of census matches. Standard errors are clustered by company.', sep = "")

rows = c("Regiment FE", rep("Yes", length(model_list))) %>% rbind %>% data.frame
attr(rows, 'position') = 3

names(model_list) = c("Deserted", "Died", "Matched in 1874")
table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         notes = note,
                         title = "Differential Attrition by Treatment (Within Regiment)",
                         coef_rename = c(company_kia = "Company KIA"))
table_out = data.table(table = table_out, name = "Figure B16")
table_list = c(table_list, list(table_out))


############
#Figure B17#
############

#Does match rate vary across latent partisanship and across treatment?

scaleFUN <- function(x) sprintf("%.3f", x)

inter_use = cwdbTable[ (sample_cmatch_best) & sample_inf] %>%
  .[, wt_1874 := 1 / .N, by = personpk] 
inter_use[, matched_1874_1 := 1*matched_1874]
inter_use[, complete := !is.na(matched_1874) & !is.na(prob_rep) & !is.na(company_kia) & !is.na(cov_co)]

n_i = inter_use[(complete), personpk] %>% unique %>% length
n_co = inter_use[(complete), cov_co_cl] %>% unique %>% length
n_r = inter_use[(complete), cov_co] %>% unique %>% length


a = inter.binning(Y = "matched_1874",
              D = 'company_kia',
              X = 'prob_rep', 
              FE = 'cov_co', 
              weights = 'wt_1874',
              data = as.data.frame(inter_use[(complete)]),
              na.rm = T, 
              cl = 'cov_co_cl',
              theme.bw = T, 
              Ylabel = "Matched",
              Dlabel = "Company KIAs",
              Xlabel = "\nPr(R)",
              ylim = c(-0.006,0.008)
              )
a$graph$layers[c(3,4)] = NULL
a$graph = a$graph + scale_y_continuous(labels = scaleFUN)

b = inter.binning(Y = "matched_1874",
                  D = 'company_kia',
                  X = 'prob_dem', 
                  FE = 'cov_co', 
                  weights = 'wt_1874',
                  data = as.data.frame(inter_use[(complete)]),
                  na.rm = T, 
                  cl = 'cov_co_cl',
                  theme.bw = T,
                  Xlabel = "\nPr(D)",
                  Dlabel = "Company KIAs",
                  Ylabel = "Matched",
                  ylim = c(-0.006,0.008)
)
b$graph$layers[c(3,4)] = NULL
b$graph$labels$y = NULL
b$graph = b$graph + theme(axis.text.y = element_blank(),
                          axis.ticks.y = element_blank(),
                          axis.title.y = element_blank() )

c = inter.binning(Y = "matched_1874",
                  D = 'company_kia',
                  X = 'prob_diff', 
                  FE = 'cov_co', 
                  weights = 'wt_1874',
                  data = as.data.frame(inter_use[(complete)]),
                  na.rm = T, 
                  cl = 'cov_co_cl',
                  theme.bw = T,
                  Xlabel = "\nPr(R) - Pr(D)",
                  Dlabel = "Company KIAs",
                  Ylabel = "Matched",
                  ylim = c(-0.006,0.008)
)
c$graph$layers[c(3,4)] = NULL
c$graph$labels$y = NULL
c$graph = c$graph + theme(axis.text.y = element_blank(),
                          axis.ticks.y = element_blank(),
                          axis.title.y = element_blank() ) 

p = plot_grid(a$graph, b$graph, c$graph, nrow = 1, align = 'hv', labels = 'auto')

ggsave("./output/appendix/Figure_B17.pdf", p, width = 8.5, height = 6)


out_table = rbind(a$est.bin, b$est.bin, c$est.bin) %>% .[, -1] %>% as.data.frame() %>% round(3)
names(out_table)[1] = "Company KIA"

note = paste("Marginal effect of Company KIAs on post-war matching, across bins of predicted partisanship probability, conditional on regiment fixed effects. Sample includes men serving in Indiana Infantry Regiments who were matched to the 1860 Census: ", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Individuals are weighted by 1 over the number of census matches. Standard errors are clustered by company.', sep = "")

table_out = kable(out_table,
                  caption = "Differential Attrition by Treatment (Within Regiment) and Partisanship") %>% 
  pack_rows(index = c("Moderator: Pr(R)" = 3, "Moderator: Pr(D)" = 3, "Moderator: Pr(R) - Pr(D)" = 3)) %>%
  kable_styling() %>%
  add_footnote(note)

table_out = data.table(table = table_out, name = "Figure B17")
table_list = c(table_list, list(table_out))


#############
#Figure B11##
#############

#Is treatment balanced on pre-enlistment covariates?

bal_use = cwdbTable[sample_cmatch_best & sample_inf]

#Merge in census covariates
setkey(bal_use, personpk, census_county)
setkey(census_balance, personpk, census_county)
bal_use = census_balance[bal_use]
bal_use = bal_use[!is.na(school)]
bal_use[, wt_1874 := 1/.N , by = personpk]

#Construct age variables
bal_use[, enlist_age := as.numeric(enlist_year) - birth_year]
bal_use[, c_enlist_age := as.numeric(enlist_year) - c_birth_year]
bal_use[, indate := as.Date(indate)]

#Balance Covariates
dvs = c("prob_dem", "prob_rep", "prob_diff",
        'drafted', 'substitute',
        'enlist_age', 'c_enlist_age' , 'married',
        'school', 'illiterate', 'hh_head', 'hh_kids', 'hh_size',
        'l_hh_real_estate', 'l_hh_personal_estate', 'owns_property',
        'hh_ma_males'
        )

#Define model specifications
ivs = c('company_kia')
fe = c('cov_co')
cl = c('cov_co_cl')
specs = data.table(dvs = rep(dvs, length(ivs)), ivs, fe, cl)

within_sd = demeanlist(bal_use[!is.na(company_kia), list(company_kia)], 
           bal_use[!is.na(company_kia), list(as.factor(cov_co))],
           weights = bal_use[!is.na(company_kia), wt_1874]) %>% 
  unlist %>% sd


n_i = bal_use[!is.na(company_kia), personpk] %>% unique %>% length
n_co = bal_use[!is.na(company_kia), cov_co_cl] %>% unique %>% length
n_r = bal_use[!is.na(company_kia), cov_co] %>% unique %>% length


model_list = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                                           data = bal_use[], 
                                           w = bal_use[,wt_1874]),
                 specs$ivs, specs$dvs, specs$fe, specs$cl)

p = model_list %>%
  dwplot(., by_2sd = T,
         vline = geom_vline(xintercept = c(0,within_sd/10, within_sd/-10), colour = "grey60", linetype = c(1,2,2)))  %>%
  relabel_predictors(c(prob_dem = "Pr(Dem)", 
                       prob_rep = "Pr(Rep)",
                       prob_diff = "Pr(Rep) - Pr(Dem)",
                       draftedTRUE = "Drafted",
                       substituteTRUE = "Substitute",
                       enlist_age = "Enlist Age (M)",
                       c_enlist_age = "Enlist Age (C)",
                       schoolTRUE = "In School",
                       illiterateTRUE = "Illiterate",
                       hh_headTRUE = "HH Head",
                       hh_kids = "HH Num. of Children",
                       hh_size = 'HH Size',
                       l_hh_real_estate = "Log HH Real Estate",
                       l_hh_personal_estate = "Log HH Personal Estate",
                       owns_propertyTRUE = "HH Owns Property",
                       marriedTRUE = "Married",
                       hh_ma_males = "HH Num. Mil. Age Males")) +
  theme_bw() + theme(legend.position = 'none') + xlab("Effect on Company KIA")

ggsave("./output/appendix/Figure_B11.pdf", p, width = 8.5, height = 11)




note = paste("Regression of Company KIA on pre-muster covariates, conditioning on regiment fixed effects. Coefficients are not standardized. Sample includes men serving in Indiana Infantry Regiments who were matched to the 1860 Census: ", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Individuals are weighted by 1 over the number of census matches. Standard errors are clustered by company.', sep = "")

rows = c("Regiment FE", rep("Yes", length(model_list))) %>% rbind %>% data.frame
attr(rows, 'position') = 35
c_names = c(prob_dem = "Pr(Dem)", 
            prob_rep = "Pr(Rep)",
            prob_diff = "Pr(Rep) - Pr(Dem)",
            draftedTRUE = "Drafted",
            substituteTRUE = "Substitute",
            enlist_age = "Enlist Age (M)",
            c_enlist_age = "Enlist Age (C)",
            schoolTRUE = "In School",
            illiterateTRUE = "Illiterate",
            hh_headTRUE = "HH Head",
            hh_kids = "HH Num. of Children",
            hh_size = 'HH Size',
            l_hh_real_estate = "Log HH Real Estate",
            l_hh_personal_estate = "Log HH Personal Estate",
            owns_propertyTRUE = "HH Owns Property",
            marriedTRUE = "Married",
            hh_ma_males = "HH Num. Mil. Age Males")

names(model_list) = "Company KIA" %>% rep(length(c_names))
table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         notes = note,
                         coef_rename = c_names,
                         title = "Balance on pre-war attributes across levels of treatment")
table_out = data.table(table = table_out, name = "Figure B11")
table_list = c(table_list, list(table_out))

############
#Figure B18#
############

#Balance: Attrition

#weighted ks test
pkstwo <- function(x, tol = 1e-06) {
  if (is.numeric(x)) 
    x <- as.double(x)
  else stop("argument 'x' must be numeric")
  p <- rep(0, length(x))
  p[is.na(x)] <- NA
  IND <- which(!is.na(x) & (x > 0))
  if (length(IND)) 
    p[IND] <- .Call(stats:::C_pKS2, p = x[IND], tol)
  p
}

ks <- function(x,z,w = NULL)
{
  if (is.null(w)) {w = rep(1, length(z))}
  n0 = sum(w[z==0])
  n1 = sum(w[z==1])
  w[z==1] <-  w[z==1]/sum(w[z==1])
  w[z==0] <- -w[z==0]/sum(w[z==0])
  ind <- order(x)
  cumv <- abs(cumsum(w[ind]))
  cumv <- cumv[diff(x[ind]) != 0]
  d = (ifelse(length(cumv) > 0, max(cumv), 0))
  p = 1- pkstwo(sqrt((n0 * n1)/(n0 + n1)) * d)
  return(p)
}

#Balance Covariates
dvs = c("prob_dem", "prob_rep", "prob_diff",
        'drafted', 'substitute',
        'enlist_age', 'c_enlist_age' , 'married',
        'school', 'illiterate', 'hh_head', 'hh_kids', 'hh_size',
        'l_hh_real_estate', 'l_hh_personal_estate', 'owns_property',
        'hh_ma_males'
)

#Define model specifications
ivs = c('company_kia')

#raw
bal_vars = c(ivs, dvs, "personpk", 'wt_1874', "matched_1874", "cov_co")
plot_data_1 = bal_use[!(matched_1874), .SD, .SDcols = bal_vars]
plot_data_2 = bal_use[(matched_1874), .SD, .SDcols = bal_vars]
plot_data_1[, type := "unmatched"]
plot_data_2[, type := "matched"]


plot_data = rbind(
  melt.data.table(plot_data_1, 
                id.vars = c("personpk", 'wt_1874', "matched_1874", "cov_co", "type")),
  melt.data.table(plot_data_2, 
                id.vars = c("personpk", 'wt_1874', "matched_1874", "cov_co", "type"))
)


add_stat = plot_data[, {if (table(value) %>% length %>% `>` (2)) {
                idx = !is.na(value)
                p = ks(value[idx], type[idx] %in% "matched", wt_1874[idx])
                test = 'ks'
              } else {
                p = lm(value ~ type, data = .SD, weights = wt_1874) %>%
                      summary %>% .$coefficients %>% .[2,4]
                test = 't'
              }
             list(p = p,
                  test = test)
             }, by = variable]


add_stat[, x := 0]
add_stat[, y := c(0.1,.5,.5, .5, 10, 10, .05, .05, 10, 10, 10, 10, .1, .05, .5, .5, 10, .1)]
add_stat[, p.format := paste("p =", sprintf( "%0.3f", p))]

p = ggplot(plot_data, aes(x=value, fill=type, color=type,  y = ..density.., weight = wt_1874)) +
  geom_histogram(position="identity", alpha = 0.5) +
  facet_wrap(~variable, nrow = 6, ncol = 3, scales = 'free') + 
  theme_bw() + 
  theme(legend.position = 'bottom') +
  geom_text(data = add_stat,
            mapping = aes(x = x, y = y, label = p.format),
            inherit.aes = F, hjust = 'inward', vjust = 'inward') + 
  theme(axis.ticks.y = element_blank(), axis.text.y=element_blank(), 
        axis.title.y = element_blank())

ggsave("./output/appendix/Figure_B18.pdf", p, width = 8.5, height = 11)

############
#Figure B19#
############

#regiment centered balance on missingness

bal_vars = c(ivs, dvs, "personpk", 'wt_1874', "matched_1874", "cov_co")
plot_data_1 = bal_use[, .SD, .SDcols = bal_vars]
plot_data_1[, c(ivs,dvs) := lapply(.SD, as.double), .SDcols = c(ivs,dvs)]
plot_data_1[, c(ivs,dvs) := lapply(.SD, function(x) x - mean(x, na.rm = T)), .SDcols = c(ivs,dvs), by = cov_co]
plot_data_2 = plot_data_1[!(matched_1874)]
plot_data_1 = plot_data_1[(matched_1874)]
plot_data_1[, type := "unmatched"]
plot_data_2[, type := "matched"]
plot_data = rbind(
  melt.data.table(plot_data_1, 
                  id.vars = c("personpk", 'wt_1874', "matched_1874", "cov_co", "type")),
  melt.data.table(plot_data_2, 
                  id.vars = c("personpk", 'wt_1874', "matched_1874", "cov_co", "type"))
)

add_stat = plot_data[, {if (table(value) %>% length %>% `>` (2)) {
  idx = !is.na(value)
  p = ks(value[idx], type[idx] %in% "matched", wt_1874[idx])
  #p = ks.test(value[idx][type[idx] %in% "all"], value[idx][type[idx] %in% "matched"])$statistic
  test = 'ks'
} else {
  p = lm(value ~ type, data = .SD, weights = wt_1874) %>%
    summary %>% .$coefficients %>% .[2,4]
  test = 't'
}
  list(p = p,
       test = test)
}, by = variable]

add_stat[, x := 0]
add_stat[, y := c(0.5,1,1, .5, 10, 10, .05, .05, 2, 2, 5, 1, .1, .05, .1, .2, 3, .1)]
add_stat[, p.format := paste("p =", sprintf( "%0.3f", p))]


scaleFUN <- function(x) sprintf("%.2f", x)

p = ggplot(plot_data, aes(x=value, fill=type, color=type,  y = ..density.., weight = wt_1874)) +
  geom_histogram(position="identity", alpha = 0.5) +
  facet_wrap(~variable, nrow = 6, ncol = 3, scales = 'free') + 
  theme_bw() + 
  theme(legend.position = 'bottom') +
  geom_text(data = add_stat,
            mapping = aes(x = x, y = y, label = p.format),
            inherit.aes = F, hjust = 'inward', vjust = 'inward') + 
  theme(axis.ticks.y = element_blank(), axis.text.y=element_blank(), 
        axis.title.y = element_blank()) +
  scale_x_continuous(labels = scaleFUN)

ggsave("./output/appendix/Figure_B19.pdf", p, width = 8.5, height = 11)

###############
#MIPO Analysis#
###############

#Get complete cases
bal_use[, indate := as.Date(indate)]
bal_use[, orgdate := as.Date(orgdate)]
bal_use = bal_use[!is.na(c_enlist_age) & !is.na(company_kia)]

#Get balance variables
X = model.matrix(~ -1 + company_kia + company_deaths  + co_kia_rate + co_death_rate + in_company_n + indate + orgdate + enlist_rank_clean + birth_year_na + prob_dem + prob_rep + prob_2 + drafted + substitute + I(1860 - c_birth_year) + school + illiterate + hh_head + hh_kids + l_hh_real_estate + l_hh_personal_estate + owns_property + married + hh_size + hh_ma_males + census_county + birth_place_group, data = bal_use)
#Center balance variables within regiment-enlistment year groups
X.c = demeanlist(X, list(bal_use[, as.factor(cov_co)]), weights = bal_use$wt_1874)
#Mean balance variables within regiment-enlistment year groups
X.m = demeanlist(X, list(bal_use[, as.factor(cov_co)]), weights = bal_use$wt_1874, means = T)

#Interactions between treatment variable
#and covariates
X.c.i = X.c[,1] * X.c[,-1]
X.m.i = X.m[,1] * X.m[,-1]
X.i = X[,1] * X[,-1]

#Squares of all variables
X.c.2 = X.c^2
X.2 = scale(X.i, scale = F)^2
X.m.2 = scale(X.m, scale = F)^2

#Calculate IPW using Random Forest: centered and mean covariates
#because RF are nonlinear, nonadditive, no need to balance on interactions, squares
rf = regression_forest(X = cbind(X.c, X.m), 
                       Y = bal_use$matched_1874, 
                       tune.parameters = 'all', 
                       num.trees = 4000,
                       sample.weights = bal_use$wt_1874)
bal_use[, ipw_rf := 1/predict(rf)[,1] * wt_1874]

############
#Figure B20#
############

#Calculate IPW using CBPS: centered and mean covariates
cbps1 = CBPS(as.factor(bal_use$matched_1874) ~  X.c + X.m + X.c.i + X.m.i + X.c.2 + X.m.2, 
             sample.weights = bal_use$wt_1874, 
             ATT = 0)

pdf(file = "./output/appendix/Figure_B20.pdf", width = 6, height = 8)
plot(cbps1, boxplot = T)
dev.off()

bal_use[, ipw_cbps1 := cbps1$weights]

############
#Figure B21#
############

#Calculate IPW using CBPS: centered covariates only
cbps2 = CBPS(as.factor(bal_use$matched_1874) ~  X.c + X.c.i + X.c.2, 
             sample.weights = bal_use$wt_1874,
             ATT = 0)

pdf(file = "./output/appendix/Figure_B21.pdf", width = 6, height = 8)
plot(cbps2, boxplot = T)
dev.off()

bal_use[, ipw_cbps2 := cbps2$weights]

############
#Figure B22#
############

# MIPO
ivs = c("company_kia")
dvs = c('party_rep', 'party_dem', 'party_diff')
fe = 'cov_co'
cl = 'cov_co_cl'

specs = data.table(dvs = rep(dvs, length(ivs)), ivs, fe, cl)


n_i = bal_use[!is.na(company_kia) & (matched_1874), personpk] %>% unique %>% length
n_co = bal_use[!is.na(company_kia) & (matched_1874), cov_co_cl] %>% unique %>% length
n_r = bal_use[!is.na(company_kia) & (matched_1874), cov_co] %>% unique %>% length


model_orig = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                              data = bal_use[(matched_1874)], 
                              w = bal_use[(matched_1874),wt_1874]),
    specs$dvs, specs$ivs, specs$fe, specs$cl) 

w_orig = model_orig %>%
    lapply(tidy) %>% 
    rbindlist %>%
    .[, model := 'Baseline']

model_rf = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                                       data = bal_use[(matched_1874)], 
                                       w = bal_use[(matched_1874),ipw_rf]),
             specs$dvs, specs$ivs, specs$fe, specs$cl) 

w_rf = model_rf %>%
  lapply(tidy) %>% 
  rbindlist %>%
  .[, model := 'RF']

model_cbps1 = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                                     data = bal_use[(matched_1874)], 
                                     w = bal_use[(matched_1874),ipw_cbps1]),
           specs$dvs, specs$ivs, specs$fe, specs$cl) 

w_cbps1 = model_cbps1 %>%
  lapply(tidy) %>% 
  rbindlist %>%
  .[, model := 'CBPS 1']

model_cbps2 = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                                        data = bal_use[(matched_1874)], 
                                        w = bal_use[(matched_1874),ipw_cbps2]),
              specs$dvs, specs$ivs, specs$fe, specs$cl) 

w_cbps2 = model_cbps2 %>%
          lapply(tidy) %>% 
          rbindlist %>%
          .[, model := 'CBPS 2']

plot_data = rbind(w_orig, w_rf, w_cbps1, w_cbps2)
plot_data$term = rep(dvs, 4)

p = dwplot(plot_data, model_name= 'model',
       by_2sd = F,
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
  relabel_predictors(c(party_rep = "Republican", 
                       party_dem = "Democrat",
                       party_diff = "Party Difference"
                       )) +
  theme_bw() + 
  scale_colour_grey(start = .3, end = .7,
                    name = "Weighting") +
  theme(legend.position = 'bottom') + xlab("Effect of Company KIA")

ggsave("./output/appendix/Figure_B22.pdf", p, width = 6, height = 5)


model_list = c(model_orig, model_rf, model_cbps1, model_cbps2)
names(model_list) = c("Republican", "Democrat", "Partisan Difference") %>% rep(4)
note = paste("Regression of post-war partisanship on Company KIA, conditioning on regiment fixed effects, using different weights for MIPO. Sample includes men serving in Indiana Infantry Regiments who were matched to the 1860 Census and the 1874 People's Guides: ", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Individuals are weighted by 1 over the number of matches multiplied by their inverse probability weights of being matched post-war. Standard errors are clustered by company.', sep = "")

rows = c("Regiment FE", rep("Yes", length(model_list))) %>% rbind %>% data.frame
attr(rows, 'position') = 3
table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         coef_rename = c(company_kia = "Company KIA"),
                         notes = note,
                         title = "Robustness of Effects of Company Casualties to Weighting to address Attrition")  %>%
    add_header_above(c(" " = 1, "Baseline" = 3, "RF IPW" = 3, "CBPS 1 IPW" = 3, "CBPS 2 IPW" = 3))
table_out = data.table(table = table_out, name = "Figure B22")
table_list = c(table_list, list(table_out))

##################################################
#Effects of Casualties on Republican Partisanship#
##################################################

#four samples#
samples = c("sample_cmatch", "sample_cmatch_best", "sample_match", 'sample_match_best')
#two approaches to attrition
attrition = c("all", "matched_1874")
#two treatments
ivs = c('company_kia', 'co_kia_rate')
#3 dvs
dvs = c('party_rep', 'party_dem', 'party_diff')
#3 sets of covariates
covs = c('no_covars', 'army_covars', 'all_covars')

#regiment fe, cluster, person weights
fe = 'cov_co'
cl = 'cov_co_cl'
wt = 'wt_1874'

# covariates: none, army, full
no_covars = NULL
army_covars = c('in_company_n', "indate", "enlist_rank_clean", 'birth_year', 'birth_year_na', "drafted", 'substitute', "joined_at_org", "census_county")
census_covars = c('prob_dem', "prob_rep", "prob_2",  'school', 'illiterate', 'hh_head', 'hh_kids', 'l_hh_real_estate', 'l_hh_personal_estate', 'owns_property',  'married',  'hh_size', 'hh_ma_males', "birth_place_group")
all_covars = c(army_covars, census_covars)

#Make specifications list
specs = expand.grid(samples, attrition, covs, ivs, dvs, stringsAsFactors = F) %>%
  as.data.table %>%
  setnames(., c('samples', 'attrition', 'covs', 'ivs', 'dvs')) %>%
  setorder(., samples, attrition,  -covs, ivs)
#Places to save results, attributes
specs[, model := vector(mode = 'list', length = nrow(specs))]
specs[, n_i := as.integer(NA)]
specs[, n_co := as.integer(NA)]
specs[, n_reg := as.integer(NA)]
specs[, sd_t := as.double(NA)]

#Loop through specifications, estimate models
for (s in unique(specs$samples)) {
    #get sample
    tempTable = cwdbTable[get(s) & sample_inf] %>% 
      .[, wt_1874 := 1/.N, by = personpk]
    #Merge in census data, if needed
    c_use = str_detect(s, 'cmatch')
    if (c_use) {
      setkey(tempTable, personpk, census_county)
      setkey(census_balance, personpk, census_county)
      tempTable = census_balance[tempTable] %>%
                  .[!is.na(school)]
    }
    #by attrition status
    for (a in unique(specs$attrition)) {
      tempUse = tempTable[(get(a))] 
      #by covariate set; drop census covars if needed
      covs = if (c_use) unique(specs$covs) else unique(specs$covs)[-3]
      for (c in covs) {
        #get analysis data
        use_vars = c(get(c), ivs, dvs, fe, cl, wt)
        idx = tempUse[, .SD, .SDcols = use_vars] %>% complete.cases
        estTable = tempUse[idx] #%>% 
                    #.[, wt_1874 := 1/.N, by = personpk]
        #get individuals; companies; regiments
        idx = specs[, samples %in% s &
                attrition %in% a &
                covs %in% c]
        specs$n_i[idx] = estTable[, personpk] %>% unique %>% length
        specs$n_co[idx] = estTable[, cov_co_cl] %>% unique %>% length
        specs$n_reg[idx] = estTable[, cov_co] %>% unique %>% length
        #Loop over IVs
        for (t in unique(specs$ivs)) {
          iv_temp = c(t, get(c)) %>% paste(collapse = " + ")
          #get within regiment treatment variance
          idx = specs[, samples %in% s &
                        attrition %in% a &
                        covs %in% c & 
                        ivs %in% t]
          specs$sd_t[idx] = estTable[, get(t) - mean(get(t)) , by = cov_co] %>% .$V1 %>% sd
          #estimate and save model
          est = Map(function(y) do_felm(y, x = iv_temp, fe = fe, cl = cl, 
                                        data = estTable, 
                                        w = estTable[,get(wt)]),
                                        unique(specs$dvs))
          specs[samples %in% s &
                  attrition %in% a &
                  covs %in% c & 
                  ivs %in% t, model := est
                ]
        }
      }
    }
}


#effects table 2: CENSUS and BEST MATCH and COUNT
#################################################
tableModels = specs[samples %in% "sample_cmatch_best" &
      attrition %in% "matched_1874" &
      ivs %in% "company_kia"]

out_table = stargazer(tableModels[, model],
           keep = c('company_kia'),
           dep.var.labels = c("Rep.", "Dem.", "Party Diff.") %>% rep(3),
           covariate.labels = c('Company KIA'),
           column.separate = c(3,3,3),
           column.labels = c("Baseline", "Army Controls", "All Controls"),
           keep.stat = 'n',
           label = "tab:indiana_effects_main_census_best_count",
           add.lines = list(c("Regiment FE", rep("Y", 9)),
                            c("Army Controls", rep(c("N", "Y"), times = c(3,6))),
                            c("Census Controls", rep(c("N", "Y"), times = c(6,3)))
                            ),
           title = "Effect of Company Casualties on Post-war Partisanship (Census-Linked, Best Matches)",
           type = 'latex', star.cutoffs = c(0.05,0.01,0.001),
           digits = NA,
           float.env	= "sidewaystable") %>% digits(., digits = 3)




note = paste0("Sample includes men serving in Indiana Regiments who were matched to the 1860 Census and their best match, if any, in the 1874 People's Guides. ",
              "Baseline and control models, respectively, include data on ", get_n(tableModels$n_i), " individual soldiers, serving in ", get_n(tableModels$n_co), " companies, across ", get_n(tableModels$n_reg), " regiments. ",
              "Regiment fixed effects includes a dummy for each group of soldiers who joined a regiment in the same year. ",
              "Observations are weighted by 1 over the number of post-war matches for each unique soldier.",
              "Standard errors are clustered by company.",
              collapse = "")

write_latex(out_table, note, './output/paper/Table_2.tex')



rename_explanator <- function(old_names) {
  new_names <- gsub("enlist_rank_clean", "Rank: ", old_names)
  new_names <- gsub('census_county', "Residence County: ", new_names)
  new_names <- gsub('birth_place_group', " Birth Place: ", new_names)
  change_covars = c('company_kia', 'in_company_n', "indate", 'birth_year', 'birth_year_naTRUE', "draftedTRUE", 'substituteTRUE', "joined_at_orgTRUE", 'prob_dem', "prob_rep", "prob_2",  'schoolTRUE', 'illiterateTRUE', 'hh_headTRUE', 'hh_kids', 'l_hh_real_estate', 'l_hh_personal_estate', 'owns_propertyTRUE',  'marriedTRUE',  'hh_size', 'hh_ma_males')
  change_new = c('Company KIA', 'Company Size', 'Muster In Date', "Birth Year", "Birth Year Missing", "Drafted", "Substitute", "Joined Unit at Formation",
                 "Pr(Democrat)", "Pr(Republican)", "Pr(Other)", "Attended School (1860)", "Illiterate (1860)", "HH Head (1860)", "No. Children in HH (1860)", 
                 "ln HH Real Estate Value (1860)", "ln HH Personal Estate Value (1860)", "Owns Property (1860)", "Married (1860)", "HH Size (1860)", "HH Military-Age Males (1860)")
  for (v in seq_along(change_covars)) {
    new_names[new_names %in% change_covars[v]] = change_new[v]
  }
  setNames(new_names, old_names)
}

rows = rbind(c("Regiment FE", rep("Y", 9)),
             c("Army Controls", rep(c("N", "Y"), times = c(3,6))),
             c("Census Controls", rep(c("N", "Y"), times = c(6,3)))
)  %>% data.frame

model_list = tableModels[, model]
names(model_list) = c("Rep.", "Dem.", "Party Diff.") %>% rep(3)

table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         coef_rename = rename_explanator,
                         title = "Effect of Company Casualties on Post-war Partisanship (Census-Linked, Best Matches)",
                         notes = note)  %>%
  add_header_above(c(" " = 1, "Baseline" = 3, "Army Controls" = 3, "Army + Census Controls" = 3))
table_out = data.table(table = table_out, name = "Table 2")
table_list = c(table_list, list(table_out))


rm(table_out)


#effects table b3: ALL and BEST MATCH and Count
tableModels = specs[samples %in% "sample_match_best" &
                      attrition %in% "matched_1874" &
                      ivs %in% "company_kia" & 
                      !is.na(n_i)]

out_table = stargazer(tableModels[, model],
                              keep = c('company_kia'),
                              dep.var.labels = c("Rep.", "Dem.", "Party Diff.") %>% rep(2),
                              covariate.labels = c('Company KIA'),
                              column.separate = c(3,3),
                              column.labels = c("Baseline", "Army Controls"),
                              keep.stat = 'n',
                              label = "tab:indiana_effects_main_all_best_count",
                              add.lines = list(c("Regiment FE", rep("Y", 6)),
                                               c("Army Controls", rep(c("N", "Y"), times = c(3,3))),
                                               c("Census Controls", rep(c("N", "Y"), times = c(6,0)))
                              ),
                              float.env = 'sidewaystable',
                              title = "Effect of Company Casualties on Post-war Partisanship (Best Matches)",
                              type = 'latex', star.cutoffs = c(0.05,0.01,0.001),
                      digits = NA) %>% digits(., digits = 3)

note = paste0("Sample includes men serving in Indiana Regiments who were matched to the 1860 Census and their best match, if any, in the 1874 People's Guides. ",
              "Baseline and control models, respectively, include data on ", get_n(tableModels$n_i), " individual soldiers, serving in ", get_n(tableModels$n_co), " companies, across ", get_n(tableModels$n_reg), " regiments. ",
              "Regiment fixed effects includes a dummy for each group of soldiers who joined a regiment in the same year. ",
              "Observations are weighted by 1 over the number of post-war matches for each unique soldier.",
              "Standard errors are clustered by company.",
              collapse = "")

write_latex(out_table, note, './output/appendix/Table_B3.tex')

rows = rbind(c("Regiment FE", rep("Y", 6)),
             c("Army Controls", rep(c("N", "Y"), times = c(3,3))),
             c("Census Controls", rep(c("N", "Y"), times = c(6,0)))
)  %>% data.frame


model_list = tableModels[, model]
names(model_list) = c("Rep.", "Dem.", "Party Diff.") %>% rep(2)
table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         coef_rename = rename_explanator,
                         title = "Effect of Company Casualties on Post-war Partisanship (Best Matches)",
                         notes = note)  %>%
  add_header_above(c(" " = 1, "Baseline" = 3, "Army Controls" = 3))
table_out = data.table(table = table_out, name = "Table B3")
table_list = c(table_list, list(table_out))

rm(table_out)


#effects table b4: CENSUS and MATCH and COUNT
#################################################
tableModels = specs[samples %in% "sample_cmatch" &
                      attrition %in% "matched_1874" &
                      ivs %in% "company_kia"]

out_table = stargazer(tableModels[, model],
                      keep = c('company_kia'),
                      dep.var.labels = c("Rep.", "Dem.", "Party Diff.") %>% rep(3),
                      covariate.labels = c('Company KIA'),
                      column.separate = c(3,3,3),
                      column.labels = c("Baseline", "Army Controls", "All Controls"),
                      keep.stat = 'n',
                      label = "tab:indiana_effects_main_census_match_count",
                      add.lines = list(c("Regiment FE", rep("Y", 9)),
                                       c("Army Controls", rep(c("N", "Y"), times = c(3,6))),
                                       c("Census Controls", rep(c("N", "Y"), times = c(6,3)))
                      ),
                      title = "Effect of Company Casualties on Post-war Partisanship (Census-Linked, All Matches)",
                      type = 'latex', star.cutoffs = c(0.05,0.01,0.001),
                      digits = NA,
                      float.env	= "sidewaystable") %>% digits(., digits = 3)

note = paste0("Sample includes men serving in Indiana Regiments who were matched to the 1860 Census and their best match, if any, in the 1874 People's Guides. ",
              "Baseline and control models, respectively, include data on ", get_n(tableModels$n_i), " individual soldiers, serving in ", get_n(tableModels$n_co), " companies, across ", get_n(tableModels$n_reg), " regiments. ",
              "Regiment fixed effects includes a dummy for each group of soldiers who joined a regiment in the same year. ",
              "Observations are weighted by 1 over the number of post-war matches for each unique soldier.",
              "Standard errors are clustered by company.",
              collapse = "")

write_latex(out_table, note, './output/appendix/Table_B4.tex')


rows = rbind(c("Regiment FE", rep("Y", 9)),
             c("Army Controls", rep(c("N", "Y"), times = c(3,6))),
             c("Census Controls", rep(c("N", "Y"), times = c(6,3)))
)  %>% data.frame

model_list = tableModels[, model]
names(model_list) = c("Rep.", "Dem.", "Party Diff.") %>% rep(3)

table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         coef_rename = rename_explanator,
                         title = "Effect of Company Casualties on Post-war Partisanship (Census-Linked, All Matches)",
                         notes = note)  %>%
  add_header_above(c(" " = 1, "Baseline" = 3, "Army Controls" = 3, "Army + Census Controls" = 3))
table_out = data.table(table = table_out, name = "Table B4")
table_list = c(table_list, list(table_out))

rm(table_out)


#effects table B5: ALL and MATCH and Count
tableModels = specs[samples %in% "sample_match" &
                      attrition %in% "matched_1874" &
                      ivs %in% "company_kia" & 
                      !is.na(n_i)]

out_table = stargazer(tableModels[, model],
                      keep = c('company_kia'),
                      dep.var.labels = c("Rep.", "Dem.", "Party Diff.") %>% rep(2),
                      covariate.labels = c('Company KIA'),
                      column.separate = c(3,3),
                      column.labels = c("Baseline", "Army Controls"),
                      keep.stat = 'n',
                      label = "tab:indiana_effects_main_all_match_count",
                      add.lines = list(c("Regiment FE", rep("Y", 6)),
                                       c("Army Controls", rep(c("N", "Y"), times = c(3,3))),
                                       c("Census Controls", rep(c("N", "Y"), times = c(6,0)))
                      ),
                      title = "Effect of Company Casualties on Post-war Partisanship (All Matches)",
                      type = 'latex', star.cutoffs = c(0.05,0.01,0.001),
                      digits = NA,
                      float.env = 'sidewaystable') %>% digits(., digits = 3)

note = paste0("Sample includes men serving in Indiana Regiments who were matched to the 1860 Census and their best match, if any, in the 1874 People's Guides. ",
              "Baseline and control models, respectively, include data on ", get_n(tableModels$n_i), " individual soldiers, serving in ", get_n(tableModels$n_co), " companies, across ", get_n(tableModels$n_reg), " regiments. ",
              "Regiment fixed effects includes a dummy for each group of soldiers who joined a regiment in the same year. ",
              "Observations are weighted by 1 over the number of post-war matches for each unique soldier.",
              "Standard errors are clustered by company.",
              collapse = "")

write_latex(out_table, note, './output/appendix/Table_B5.tex')


rows = rbind(c("Regiment FE", rep("Y", 6)),
             c("Army Controls", rep(c("N", "Y"), times = c(3,3))),
             c("Census Controls", rep(c("N", "Y"), times = c(6,0)))
)  %>% data.frame

model_list = tableModels[, model]
names(model_list) = c("Rep.", "Dem.", "Party Diff.") %>% rep(2)

table_out = modelsummary(model_list, stars = T, 
                         add_rows = rows,
                         coef_rename = rename_explanator,
                         title = "Effect of Company Casualties on Post-war Partisanship (All Matches)",
                         notes = note)  %>%
  add_header_above(c(" " = 1, "Baseline" = 3, "Army Controls" = 3))
table_out = data.table(table = table_out, name = "Table B5")
table_list = c(table_list, list(table_out))

rm(table_out)


#############################
#Figure B12: Robustness plot#
#############################
get_t = function(m) {
  m %>% tidy %>% .[1, 'statistic']
}

specs[, t_stat := if (!is.na(n_i)) {get_t(model[[1]])} else {as.double(NA)}, by = 1:nrow(specs)]
specs[, attrition_label := ifelse(attrition %in% "all", "All Cases", "Complete Cases")]
specs[, dv_f := factor(dvs, levels = c('party_rep', 'party_dem', 'party_diff'), 
                            labels = c('Republican', "Democrat", "Difference"))]
specs[, ivs := ifelse(ivs %in% "company_kia", "Company KIA", "Co. KIA Rate")]
specs[, cov_labels := factor(covs, levels = c("no_covars", "army_covars", "all_covars"),
                       labels = c("Baseline", "Army", "All"))]

p = ggplot(specs, aes(x = t_stat, color = cov_labels, fill = cov_labels, alpha = ivs)) + 
  geom_vline(xintercept = c(-1.96,1.96), color = 'red') +
  geom_vline(xintercept = c(-1.64,1.64), color = 'red', lty = 2) +
  geom_vline(xintercept = c(0)) +
  geom_histogram(bins = 60) +
  facet_wrap(~ attrition_label + dv_f) + 
  theme_bw() +
  theme(legend.position = "bottom") +
  xlab("Company KIA t-statistic")

ggsave('./output/appendix/Figure_B12.pdf', p, width = 8.5, height = 7)

############
#Figure B13#
############

#Interaction with democratic latent partisanship:#
##################################################

inter_use = cwdbTable[(sample_cmatch) & (sample_inf) & matched_1874] %>%
  .[, wt_1874 := 1 / .N, by = personpk]

inter_use[, latent_dem := 1*(prob_dem >= 0.5)]
inter_use[, latent_nondem := 1*(prob_dem < 0.5)]

inter_use[, complete := !is.na(company_kia)& !is.na(party_diff) & !is.na(prob_dem)]
n_i = inter_use[(complete), personpk] %>% unique %>% length
n_co = inter_use[(complete), cov_co_cl] %>% unique %>% length
n_r = inter_use[(complete), cov_co] %>% unique %>% length


#Specifications
dvs = paste0("party_", c('rep', 'dem', 'diff'))
ivs = c('company_kia*latent_dem', 'company_kia*latent_nondem')
idx = 1:2
fe = c('cov_co', 'cov_co')
cl = c('cov_co_cl', 'cov_co_cl')

specs = data.table(dvs = rep(dvs, each = length(ivs)), ivs, fe, cl, idx)
setkey(specs, idx)

model_list = Map(function(y,x,f,c) do_felm(y, x, fe = f, cl = c, 
                                           data = inter_use, 
                                           w = inter_use$wt_1874),
                 specs$dvs, specs$ivs, specs$fe, specs$cl)

p = model_list %>%
  lapply(tidy) %>% 
  rbindlist %>% 
  .[term %in% "company_kia"] %>%
  .[, list(term = rep(c("Republican", "Democrat", 'Party Diff.'), 2), 
           estimate, std.error, statistic, p.value,
           model = rep(c("Pr(Dem.) < 0.5", "Pr(Dem.) >= 0.5"), each = 3))] %>%
  dwplot(., vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) +
  scale_colour_grey(start = .3, end = .7, name = "Group") +
  xlab("Effect of Company KIA") +
  ylab("Partisanship 1874") +
  theme_bw() +
  theme(legend.position = 'bottom') 

ggsave("./output/appendix/Figure_B13.pdf", p,
    width = 6, height = 4)



note = paste("Marginal effect of Company KIAs on partisan swing, across predicted pre-war partisanship (probability of partisanship greater or less than 0.5), conditional on regiment fixed effects. Sample includes men serving in Indiana Infantry Regiments who were matched to the 1860 Census: ", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Individuals are weighted by 1 over the number of 1974 Peoples Guide matches. Standard errors are clustered by company.', sep = "")

names(model_list) = c("Republican", "Democrat", 'Party Diff.') %>% rep(2)
table_out = modelsummary(model_list,
                         coef_rename = c('company_kia' = 'Company KIA', 'company_kia:latent_dem' = "Company KIA x Pr(Dem) >= 0.5", 'company_kia:latent_nondem' = "Company KIA x Pr(Dem) < 0.5",
                                         latent_dem = "Pr(Dem) >= 0.5", latent_nondem = "Pr(Dem) < 0.5"),
                         notes = note,
                         stars = T,
                         title = "Effects of Company Casualties By Predicted Partisanship")

table_out = data.table(table = table_out, name = "Figure B13")
table_list = c(table_list, list(table_out))


############
#Figure B14#
############

#effect over time:
estTable = cwdbTable[sample_cmatch_best & matched_1874 & sample_inf] %>%
  .[, wt_1874 := 1/.N, by = personpk]
estTable[, ey := as.numeric(enlist_year)]
estTable[, post_ep := 1*(enlist_year %in% 1863:1865)]
estTable[, complete := !is.na(company_kia)& !is.na(party_diff) & !is.na(post_ep)]
n_i = estTable[(complete), personpk] %>% unique %>% length
n_co = estTable[(complete), cov_co_cl] %>% unique %>% length
n_r = estTable[(complete), cov_co] %>% unique %>% length

g = inter.binning(estTable[(complete)] ,
                  Y = "party_diff",
                  D = "company_kia", 
                  X = "post_ep",
                  FE = "cov_co",
                  cl = "cov_co_cl",
                  na.rm = T,
                  weights = 'wt_1874',
                  cutoffs = 0.5,
                  Xlabel = "Enlist Year >= 1863",
                  Dlabel = "Company KIA",
                  Ylabel = "Party Diff.",
                  theme.bw = T)

ggsave("./output/appendix/Figure_B14.pdf", g$graph, width = 6, height = 4)

note = paste("Marginal effect of Company KIAs on partisan swing, for soldiers enlisting pre- and post- Emancipation Proclamation, conditional on regiment fixed effects. Sample includes men serving in Indiana Infantry Regiments who were matched to the 1860 Census: ", n_i, " unique soldiers, in " , n_co, " companies, across ", n_r, ' regiments. Individuals are weighted by 1 over the number of 1974 Peoples Guide matches. Standard errors are clustered by company.', sep = "")

m = felm(party_diff ~ company_kia*post_ep | cov_co | 0 | cov_co_cl, data= estTable[(complete)], weights = estTable[(complete),wt_1874]) 

table_out = modelsummary(m,
                         coef_omit = '^post_ep',
                         coef_rename = c('company_kia' = 'Company KIA', 'company_kia:post_ep' = "Company KIA x Post-Emancipation Proclamation"),
                         notes = note,
                         stars = T,
                         title = "Effects of Company Casualties across early and late enlistments")

table_out = data.table(table = table_out, name = "Figure B14")
table_list = c(table_list, list(table_out))

###############
#Odds and Ends#
###############

#evidence that predicted partisanship is meaningful:
inter_use = cwdbTable[(sample_cmatch_best) & (sample_inf) & matched_1874] %>%
  .[, wt_1874 := 1 / .N, by = personpk]

m1 = lm(scale(prob_dem) ~ drafted + substitute, 
     data = inter_use, weights = inter_use$wt_1874) 
m2 = lm(scale(prob_rep) ~ drafted + substitute, 
     data = inter_use, weights = inter_use$wt_1874)
m3 = lm(scale(prob_diff) ~ drafted + substitute, 
     data = inter_use, weights = inter_use$wt_1874) 
m4 = lm(scale(party_dem) ~ drafted + substitute, 
        data = inter_use, weights = inter_use$wt_1874) 
m5 = lm(scale(party_rep) ~ drafted + substitute, 
        data = inter_use, weights = inter_use$wt_1874) 
m6 = lm(scale(party_diff) ~ drafted + substitute, 
     data = inter_use, weights = inter_use$wt_1874) 
  
  stargazer(list(m1,m2,m3,m4,m5,m6),
            se = lapply(list(m1,m2,m3,m4,m5,m6), function(x) x %>% vcovHC %>% diag %>% sqrt),
            type = 'text',
            dep.var.labels = c("Dem.", "Rep.", "Swing", "Dem.", "Rep.", "Swing"), 
            column.separate = c(3,3),
            column.labels = c("Predicted", "Actual"),
            dep.var.caption = "Standardized Partisanship",
            covariate.labels = c("Drafted", "Substitute")
            )

#######################################
#Magnitude of Individual-Level Effects#
#######################################

#create "within" groups
in_veterans_treatment[, enlist_year := as.Date(indate) %>% year]
in_veterans_treatment[, cov_co := paste(regimentpk, enlist_year,term_sort, sep = ":")]
in_veterans_treatment[, cov_co_cl := paste(regimentpk, enlist_year,term_sort, companypk, sep = ":")]

#Get mean company casualties exposure
in_veterans_treatment[ , mean_company_kia := mean(company_kia, na.rm = T) , by = cov_co_cl]

#calculate min/max company casualties by regiment 
in_veterans_treatment[, reg_min := min(mean_company_kia, na.rm = T), by = cov_co]
in_veterans_treatment[, reg_max := max(mean_company_kia, na.rm = T) , by = cov_co]
#calculate sd regiment casualties
in_veterans_treatment[, reg_sd := data.table(cov_co_cl, mean_company_kia) %>% 
                                  unique %>% 
                                  .$mean_company_kia %>%
                                  sd(na.rm = T), by = cov_co]

#Calculate "counterfactual" Partisan Swing
#1.9 ppt swing comes from multiplying coefficient in Table 2, column 3 by 2
shift_sd_down = in_veterans_treatment[ !is.na(company_kia) & !is.na(reg_sd), 1.9 * rowMaxs(cbind(-reg_sd, reg_min - company_kia)) ] %>% mean
shift_sd_up = in_veterans_treatment[!is.na(company_kia) & !is.na(reg_sd), 1.9 * rowMins(cbind(reg_sd, reg_max - company_kia)) ] %>% mean
shift_min = in_veterans_treatment[ !is.na(company_kia) & !is.na(reg_sd), 1.9 * (reg_min - company_kia) ] %>% mean
shift_max = in_veterans_treatment[ !is.na(company_kia) & !is.na(reg_sd), 1.9 * (reg_max - company_kia) ] %>% mean

#surviving Indiana soldier over post-war eligble voters in Indiana 1874 (as per 1870 census)
fraction_vet = (149/380.6)

#Load 1874 congressional elections
cong = read.dta('./data/county_elections/raw/ICPSR_03371/DS0001/03371-0001-Data.dta') %>% as.data.table
in_vote_share = cong[STATE %in% "Indiana" & YEAR %in% "1874"] %>% 
  .[, list(rep = VOTE[PARTY %in% 200],
           dem = VOTE[PARTY %in% 100],
           total = TOTAL[1]
  ), by = DISTRICT]

#Correct 8th District Votes: 
#Republicans awarded this seat using the vote count given below
in_vote_share[DISTRICT %in% 8, c('rep','dem') := list(14005, 13798)]
in_vote_share[, gop_margin := (rep - dem)/total * 100]

#Seats won by Republicans under different "counterfactual" scenarios
in_vote_share[, gop_margin > 0] %>% sum %>% cat("Seats actually won by Republicans:" , ., "out of 13\n")
in_vote_share[, (fraction_vet * shift_sd_down + gop_margin) > 0] %>% sum %>% cat("Seats won by Republicans if casualties 1 (within regiment) sd lower:" , ., "out of 13\n")
in_vote_share[, (fraction_vet * shift_sd_up + gop_margin) > 0] %>% sum %>% cat("Seats won by Republicans if casualties 1 (within regiment) sd higher:" , ., "out of 13\n")
in_vote_share[, (fraction_vet * shift_min + gop_margin) > 0] %>% sum %>% cat("Seats won by Republicans if casualties set to regiment minimum:" , ., "out of 13\n")
in_vote_share[, (fraction_vet * shift_max + gop_margin) > 0] %>% sum %>% cat("Seats won by Republicans if casualties set to regiment maximum:" , ., "out of 13\n")

##############################################################
#Cleanup
rm(list = setdiff(ls(), c(keep, 'keep')))
gc()